*-----------------------------------------------------------------
version 11
cap clear mata
cap clear
cap log close
set more off
cd "E:\REStat_MS14767_Vol96(2)\Data preparation Compustat segment"
log using "8_links.log", replace
*-----------------------------------------------------------------

cap set mem 1000m
cap set mat 2000


use "raw_rjv_panel_america.dta", clear

*************
* here we retrive some relevant information (SIC4, market shares, HHI) of the firms from compustat
*************

so ticker year
merge m:1 ticker year using "segment_wide.dta"
tab _merge

****************************************************


*************
* drop firms in compustat, which are not in the RJV database
* drop firms in the database for which we don't have MS information
*************

drop if _merge==2
drop if _merge==1
drop _merge


**************
* we drop all observation where the firm was not yet an insider
**************

drop if ins==0

**********************
* we keep only relevant variables
************************

keep rjvname entryname entityname ticker year SIC* sic4 MS* comnum rjvnum


egen comnum2=group(entityname)
drop comnum
rename comnum2 comnum

egen rjvnum2=group(rjvname)
drop rjvnum
rename rjvnum2 rjvnum



*the maximum number of industries is 459
sum year
*the maximum number of years is 14



*********************************************************************************************************************
*********************************************************************************************************************
****
****                     Here we count the links with firms from any industry
****
*********************************************************************************************************************
*********************************************************************************************************************

so year comnum rjvnum

egen seq=seq(), by(year)
egen seq_SIC=group(year)
egen S=max(seq), by(year)

* we generate empty vectors for the link measures, which will be generated in mata

gen measure1=.
gen measure2=.


sum S

local max_obs = r(max)
local i=1
forvalues i=1/`max_obs'{
qui by year: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year)
qui by year: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i'
drop x`i'
drop rjvnum`i'
}



**** here we go into mata

clear mata
mata

rjv = st_data(. ,("rjvnum"))
comnum = st_data(. ,("comnum"))
seq_SIC= st_data(. , ("seq_SIC")) 
S= st_data(. , ("S")) 
n_var =  st_nvar()

measure1_allfirm = 0
measure2_allfirm = 0



	s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/
	index=(s::n_var)'
	st_view(f=.,.,index)
	f=f

max_SIC = max(seq_SIC)




for(i=1;i<=max_SIC;i++) {

rowindex = mm_which(seq_SIC:==i)
comnum_rel = comnum[rowindex]
nr_rows = length(comnum_rel)
uniquefirms = uniqrows(comnum_rel)
nr_unique = length(uniquefirms)
	measurevect1 = comnum_rel*0
	measurevect2 = comnum_rel*0

if (nr_unique==1) {
nr_entry1 = length(comnum_rel)
measurevect1=mm_repeat(0,nr_entry1)
measurevect2=mm_repeat(0,nr_entry1)

}

if (nr_unique>1) {


for (j=1;j<=nr_unique;j++) {
	firm_rel = uniquefirms[j]
	firm_position = mm_which(comnum_rel:==firm_rel)
	nr_firm_entry = length(firm_position)


	measure = f[rowindex,firm_position]
	/*
	check = rowmax(measure)
	measurevect = rowsum(measure)
	check2 = check-measurevect
	nonzero = mm_which(check2:<0)
	alert = length(nonzero)
	*/
	
	measurevect0 = rowsum(rowmax(measure))
	notself = mm_which(measurevect0:!=firm_rel)					/*theoretically this step is not nessessary,
																				it makes sure that links with itself are 
																				not counted. */			
	measurevect = measurevect0[notself]
	nonzero_pos = mm_which(measurevect:>0)
	
	
	measure1 = length(measurevect[nonzero_pos])
	measure2 = length(uniqrows(measurevect[nonzero_pos]))


	measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
	measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
	
	
	}
} /* end if */

	measure1_allfirm = (measure1_allfirm \ measurevect1)
	measure2_allfirm = (measure2_allfirm \ measurevect2)
}


n = length(measure1_allfirm)
st_store(., ("measure1"), measure1_allfirm[2::n])
st_store(., ("measure2"), measure2_allfirm[2::n])


end


rename measure1 links1_tot
label var links1_tot "Total direct links with double counting-RJV"
rename measure2 links2_tot
label var links2_tot "Total direct links without double counting-RJV"





*********************************************************************************************************************
*********************************************************************************************************************
****
****                     Here we count the links with firms from the same industry, i.e. direct competitors
****
*********************************************************************************************************************
*********************************************************************************************************************
drop f*

reshape long SIC MS, i(rjvnum ticker year) j(seq_sic)
drop if SIC ==.
drop seq seq_SIC S
so year SIC comnum rjvnum

egen seq=seq(), by(year SIC)
egen seq_SIC=group(year SIC)
egen S=max(seq), by(year SIC)


sum S
local max_obs = r(max)
local i=1
forvalues i=1/`max_obs' {
qui by year SIC: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year SIC)
qui by year SIC: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i'
drop x`i'
drop rjvnum`i'
}






	******************************************************************************
	**** here we pu the f Matrix into mata

			mata

			rjv = st_data(. ,("rjvnum"))
			comnum = st_data(. ,("comnum"))
			seq_SIC= st_data(. , ("seq_SIC")) 
			S= st_data(. , ("S")) 
			n_var =  st_nvar()

			s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/
			index=(s::n_var)'
			st_view(f=.,.,index)
			f=f

			end
	******************************************************************************
drop f*
gen measure1=.
gen measure2=.
gen measure3=.
gen measure4=. 

sum S
local max_obs = r(max)
local i=1
forvalues i=1/`max_obs' {
qui by year SIC: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year SIC)
qui by year SIC: gen ms`i'=MS if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' 
drop x`i'
drop rjvnum`i'
}
		

		
		********************************************************************************************
		********************************************************************************************
		* Since the market shares don't nessessarily uniquely identify each firm withinn a SIC code,
		* it is helpfull to generate the number of links and and the aggregated marketshare of those
		* firms, which form the links together
		* In that way we can use the position of unique firms in the measurevector from the 
		* comnum identifier to identifa the unique marketshares.
		********************************************************************************************
		********************************************************************************************
		
		
		**** here we put the ms Matrix into mata

	mata

		

		n_var =  st_nvar()
		s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/
		index=(s::n_var)'
		st_view(ms=.,.,index)
		ms=ms

	measure1_allfirm = 0
	measure2_allfirm = 0
	measure3_allfirm = 0
	measure4_allfirm = 0
	
	max_SIC = max(seq_SIC)


	for(i=1;i<=max_SIC;i++) {

	rowindex = mm_which(seq_SIC:==i)
	comnum_rel = comnum[rowindex]
	nr_rows = length(comnum_rel)
	uniquefirms = uniqrows(comnum_rel)
	nr_unique = length(uniquefirms)
		measurevect1 = comnum_rel*0
		measurevect2 = comnum_rel*0
		measurevect3 = comnum_rel*0
		measurevect4 = comnum_rel*0

	if (nr_unique==1) {
	nr_entry1 = length(comnum_rel)
	measurevect1=mm_repeat(0,nr_entry1)
	measurevect2=mm_repeat(0,nr_entry1)
	measurevect3=mm_repeat(0,nr_entry1)
	measurevect4=mm_repeat(0,nr_entry1)

	}

	if (nr_unique>1) {


	for (j=1;j<=nr_unique;j++) {
		firm_rel = uniquefirms[j]
		firm_position = mm_which(comnum_rel:==firm_rel)
		nr_firm_entry = length(firm_position)


		measure1 = f[rowindex,firm_position]
		measure2 = ms[rowindex,firm_position]
		
		/*
		check = rowmax(measure1)
		measurevect01 = rowsum(measure1)
		check2 = check-measurevect01
		nonzero = mm_which(check2:<0)
		alert = length(nonzero)
		*/
		
		measurevect001 = rowsum(rowmax(measure1))
		measurevect002 = rowsum(rowmax(measure2))
		
		notself = mm_which(measurevect001:!=firm_rel)
		
		
		measurevect01 = measurevect001[notself]
		measurevect02 = measurevect002[notself]								/*theoretically this step is not nessessary,
																				it makes sure that links with itself are 
																				not counted. */		
		nonzero_pos = mm_which(measurevect01:>0)
		
		measuremat1 = (measurevect01[nonzero_pos],measurevect02[nonzero_pos])/*at this point, marketshare and firm information are linked
																				such that we can identify the list of unique links using 
																				the firm information*/
		
		measuremat2 = uniqrows(measuremat1)		
	
	
		measure1 = rows(measuremat1)
		measure2 = rows(measuremat2)
		measure3 = colsum(measuremat1[.,2])
		measure4 = colsum(measuremat2[.,2])
		
	measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
	measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
	measurevect3[firm_position]=mm_repeat(measure3,nr_firm_entry)
	measurevect4[firm_position]=mm_repeat(measure4,nr_firm_entry)	
		
		}
	} /* end if */

		measure1_allfirm = (measure1_allfirm \ measurevect1)
		measure2_allfirm = (measure2_allfirm \ measurevect2)
		measure3_allfirm = (measure3_allfirm \ measurevect3)
		measure4_allfirm = (measure4_allfirm \ measurevect4)
	}


	n = length(measure1_allfirm)
	st_store(., ("measure1"), measure1_allfirm[2::n])
	st_store(., ("measure2"), measure2_allfirm[2::n])
	st_store(., ("measure3"), measure3_allfirm[2::n])
	st_store(., ("measure4"), measure4_allfirm[2::n])


end


rename measure1 links1
label var links1 "Direct links with double counting"
rename measure2 links2
label var links2 "Direct links without double counting"
rename measure3 links_MS1_tot
label var links_MS1_tot "Market share of total direct links with double counting-RJV"
rename measure4 links_MS2_tot
label var links_MS2_tot "Market share of total direct links without double counting-RJV"







*********************************************************************************************************************
*********************************************************************************************************************
****
****                     Here we count the links with firms from the same industry
****                     which are in a RJV whose SIC4 code is the same as that of the firm
****
*********************************************************************************************************************
*********************************************************************************************************************
drop ms* 

so year SIC comnum rjvnum

sum S
local max_obs = r(max)
local i=1
forvalues i=1/`max_obs' {
qui by year SIC: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year SIC)
qui by year SIC: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' & SIC==sic4
drop x`i'
drop rjvnum`i'
}



******************************************************************************
	**** here we pu the f Matrix into mata

			mata

			rjv = st_data(. ,("rjvnum"))
			comnum = st_data(. ,("comnum"))
			seq_SIC= st_data(. , ("seq_SIC")) 
			S= st_data(. , ("S")) 
			n_var =  st_nvar()

			s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/
			index=(s::n_var)'
			st_view(f=.,.,index)
			f=f

			end
	******************************************************************************
drop f*
	
gen measure1=.
gen measure2=.
gen measure3=.
gen measure4=. 

sum S
local max_obs = r(max)
local i=1
forvalues i=1/`max_obs' {
qui by year SIC: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year SIC)
qui by year SIC: gen ms`i'=MS if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' 
drop x`i'
drop rjvnum`i'
}
		

		
		********************************************************************************************
		********************************************************************************************
		* Since the market shares don't nessessarily uniquely identify each firm withinn a SIC code,
		* it is helpfull to generate the number of links and and the aggregated marketshare of those
		* firms, which form the links together
		* In that way we can use the position of unique firms in the measurevector from the 
		* comnum identifier to identifa the unique marketshares.
		********************************************************************************************
		********************************************************************************************
		
		
		**** here we pu the ms Matrix into mata

	mata

			n_var =  st_nvar()

			s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/
			index=(s::n_var)'
			st_view(ms=.,.,index)
			ms = ms
		

	measure1_allfirm = 0
	measure2_allfirm = 0
	measure3_allfirm = 0
	measure4_allfirm = 0
	
	max_SIC = max(seq_SIC)


	for(i=1;i<=max_SIC;i++) {

	rowindex = mm_which(seq_SIC:==i)
	comnum_rel = comnum[rowindex]
	nr_rows = length(comnum_rel)
	uniquefirms = uniqrows(comnum_rel)
	nr_unique = length(uniquefirms)
		measurevect1 = comnum_rel*0
		measurevect2 = comnum_rel*0
		measurevect3 = comnum_rel*0
		measurevect4 = comnum_rel*0

	if (nr_unique==1) {
	nr_entry1 = length(comnum_rel)
	measurevect1=mm_repeat(0,nr_entry1)
	measurevect2=mm_repeat(0,nr_entry1)
	measurevect3=mm_repeat(0,nr_entry1)
	measurevect4=mm_repeat(0,nr_entry1)

	}

	if (nr_unique>1) {


	for (j=1;j<=nr_unique;j++) {
		firm_rel = uniquefirms[j]
		firm_position = mm_which(comnum_rel:==firm_rel)
		nr_firm_entry = length(firm_position)


		measure1 = f[rowindex,firm_position]
		measure2 = ms[rowindex,firm_position]
		
		/*
		check = rowmax(measure1)
		measurevect01 = rowsum(measure1)
		check2 = check-measurevect01
		nonzero = mm_which(check2:<0)
		alert = length(nonzero)
		*/
		
		measurevect001 = rowsum(rowmax(measure1))
		measurevect002 = rowsum(rowmax(measure2))
		
		notself = mm_which(measurevect001:!=firm_rel)
		
		
		measurevect01 = measurevect001[notself]								
		measurevect02 = measurevect002[notself]								/*theoretically this step is not nessessary,
																				it makes sure that links with itself are 
																				not counted. */	
		
		nonzero_pos = mm_which(measurevect01:>0)
		
		measuremat1 = (measurevect01[nonzero_pos],measurevect02[nonzero_pos])/*at this point, marketshare and firm information are linked
																				such that we can identify the list of unique links using 
																				the firm information*/
		
		measuremat2 = uniqrows(measuremat1)		
	
	
		measure1 = rows(measuremat1)
		measure2 = rows(measuremat2)
		measure3 = colsum(measuremat1[.,2])
		measure4 = colsum(measuremat2[.,2])
		
	measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
	measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
	measurevect3[firm_position]=mm_repeat(measure3,nr_firm_entry)
	measurevect4[firm_position]=mm_repeat(measure4,nr_firm_entry)	
		
		}
	} /* end if */

		measure1_allfirm = (measure1_allfirm \ measurevect1)
		measure2_allfirm = (measure2_allfirm \ measurevect2)
		measure3_allfirm = (measure3_allfirm \ measurevect3)
		measure4_allfirm = (measure4_allfirm \ measurevect4)
	}


	n = length(measure1_allfirm)
	st_store(., ("measure1"), measure1_allfirm[2::n])
	st_store(., ("measure2"), measure2_allfirm[2::n])
	st_store(., ("measure3"), measure3_allfirm[2::n])
	st_store(., ("measure4"), measure4_allfirm[2::n])


end
rename measure1 links1_same
label var links1_same "Direct links with double counting-RJV, Firm-SIC = RJV-SIC"
rename measure2 links2_same
label var links2_same "Direct links without double counting-RJV, Firm-SIC = RJV-SIC"
rename measure3 links_MS1_same
label var links_MS1_same "Market share of direct links with double counting-RJV, Firm-SIC = RJV-SIC"
rename measure4 links_MS2_same
label var links_MS2_same "Market share of direct links without double counting-RJV, Firm-SIC = RJV-SIC"

*********************************************************************************************************************
*********************************************************************************************************************
****
****                     Here we count the links with firms from the same industry
****                     which are in a RJV whose SIC4 code is not the same as that of the firm
****
*********************************************************************************************************************
*********************************************************************************************************************
drop ms*

so year SIC comnum rjvnum

sum S
local max_obs = r(max)
local i=1
forvalues i=1/`max_obs' {
qui by year SIC: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year SIC)
qui by year SIC: gen f`i'=comnum if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' & SIC!=sic4
drop x`i'
drop rjvnum`i'
}

******************************************************************************
	**** here we pu the f Matrix into mata

			mata

			rjv = st_data(. ,("rjvnum"))
			comnum = st_data(. ,("comnum"))
			seq_SIC= st_data(. , ("seq_SIC")) 
			S= st_data(. , ("S")) 
			n_var =  st_nvar()

			s = n_var - max(S)+1 /*position of the first Variable of the Matrix*/
			index=(s::n_var)'
			st_view(f=.,.,index)
			f=f

			end
******************************************************************************
drop f*

* we generate empty vectors for the link measures, which will be generated in mata	
gen measure1=.
gen measure2=.
gen measure3=.
gen measure4=. 

sum S
local max_obs = r(max)
local i=1
forvalues i=1/`max_obs' {
qui by year SIC: gen x`i'=rjvnum if seq==`i'
qui egen rjvnum`i'=max(x`i'), by(year SIC)
qui by year SIC: gen ms`i'=MS if rjvnum`i'==rjvnum & rjvnum`i'!=x`i' & SIC!=sic4
drop x`i'
drop rjvnum`i'
}


**************************************************************************************************		
**** here we pu the ms Matrix into mata

	mata

			n_var =  st_nvar()

			s = n_var - max(S)+1 /*position of the first Variable of gthe Matrix*/
			index=(s::n_var)'
			st_view(ms=.,.,index)
			ms = ms
		

	measure1_allfirm = 0
	measure2_allfirm = 0
	measure3_allfirm = 0
	measure4_allfirm = 0
	
	max_SIC = max(seq_SIC)


	for(i=1;i<=max_SIC;i++) {

	rowindex = mm_which(seq_SIC:==i)
	comnum_rel = comnum[rowindex]
	nr_rows = length(comnum_rel)
	uniquefirms = uniqrows(comnum_rel)
	nr_unique = length(uniquefirms)
		measurevect1 = comnum_rel*0
		measurevect2 = comnum_rel*0
		measurevect3 = comnum_rel*0
		measurevect4 = comnum_rel*0

	if (nr_unique==1) {
	nr_entry1 = length(comnum_rel)
	measurevect1=mm_repeat(0,nr_entry1)
	measurevect2=mm_repeat(0,nr_entry1)
	measurevect3=mm_repeat(0,nr_entry1)
	measurevect4=mm_repeat(0,nr_entry1)

	}

	if (nr_unique>1) {


	for (j=1;j<=nr_unique;j++) {
		firm_rel = uniquefirms[j]
		firm_position = mm_which(comnum_rel:==firm_rel)
		nr_firm_entry = length(firm_position)


		measure1 = f[rowindex,firm_position]
		measure2 = ms[rowindex,firm_position]
		
		/*
		check = rowmax(measure1)
		measurevect01 = rowsum(measure1)
		check2 = check-measurevect01
		nonzero = mm_which(check2:<0)
		alert = length(nonzero)
		*/
		
		measurevect001 = rowsum(rowmax(measure1))
		measurevect002 = rowsum(rowmax(measure2))
		
		notself = mm_which(measurevect001:!=firm_rel)						/*theoretically this step is not nessessary,
																				it makes sure that links with itself are 
																				not counted. */
		
		
		measurevect01 = measurevect001[notself]
		measurevect02 = measurevect002[notself]
		
		nonzero_pos = mm_which(measurevect01:>0)
		
		measuremat1 = (measurevect01[nonzero_pos],measurevect02[nonzero_pos]) /*at this point, marketshare and firm information are linked
																				such that we can identify the list of unique links using 
																				the firm information*/
		
		measuremat2 = uniqrows(measuremat1)		
	
	
		measure1 = rows(measuremat1)
		measure2 = rows(measuremat2)
		measure3 = colsum(measuremat1[.,2])
		measure4 = colsum(measuremat2[.,2])
		
	measurevect1[firm_position]=mm_repeat(measure1,nr_firm_entry)
	measurevect2[firm_position]=mm_repeat(measure2,nr_firm_entry)
	measurevect3[firm_position]=mm_repeat(measure3,nr_firm_entry)
	measurevect4[firm_position]=mm_repeat(measure4,nr_firm_entry)	
		
		}
	} /* end if */

		measure1_allfirm = (measure1_allfirm \ measurevect1)
		measure2_allfirm = (measure2_allfirm \ measurevect2)
		measure3_allfirm = (measure3_allfirm \ measurevect3)
		measure4_allfirm = (measure4_allfirm \ measurevect4)
	}


	n = length(measure1_allfirm)
	st_store(., ("measure1"), measure1_allfirm[2::n])
	st_store(., ("measure2"), measure2_allfirm[2::n])
	st_store(., ("measure3"), measure3_allfirm[2::n])
	st_store(., ("measure4"), measure4_allfirm[2::n])

end
**************************************************************************************

rename measure1 links1_diff
label var links1_diff "Direct links with double counting-RJV, Firm-SIC is not RJV-SIC"
rename measure2 links2_diff
label var links2_diff "Direct links without double counting-RJV in same SIC, Firm-SIC is not RJV-SIC"
rename measure3 links_MS1_diff
label var links_MS1_diff "Market share of direct links with double counting-RJV, Firm-SIC is not RJV-SIC"
rename measure4 links_MS2_diff
label var links_MS2_diff "Market share of direct links without double counting-RJV, Firm-SIC is not RJV-SIC"

drop ms*


save "links_big.dta", replace

egen s = seq(), by(year comnum SIC)

keep if s == 1
keep  ticker year SIC links*
so ticker year

desc

save links.dta, replace
clear mata
log close



